
This tutorial uses data extracted from video footage of a soccer game that was published in https://github.com/Friends-of-Tracking-Data-FoTD/Last-Row
import numpy as np
import pandas as pd
from geopandas import GeoDataFrame, read_file
from shapely.geometry import Point, LineString, Polygon
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import movingpandas as mpd
from holoviews import opts, dim
import holoviews as hv
import warnings
warnings.simplefilter("ignore")
INFO: Missing optional dependencies. To use the trajectory smoother classes please install Stone Soup (see https://stonesoup.readthedocs.io/en/latest/#installation).
hvplot_defaults = {'line_width':5, 'frame_height':350, 'frame_width':700, 'colorbar':True, 'tiles':None, 'geo':False,}
mpd.__version__
'0.9.rc3'
from os.path import exists
from urllib.request import urlretrieve
def get_file_from_url(url):
file = url.split('/')[-1]
if not exists(file):
urlretrieve(url, file)
return file
def get_df_from_gh_url(url):
file = get_file_from_url(url)
return pd.read_csv(file)
input_file = "https://raw.githubusercontent.com/Friends-of-Tracking-Data-FoTD/Last-Row/master/datasets/positional_data/liverpool_2019.csv"
df = get_df_from_gh_url(input_file)
df.drop(columns=['Unnamed: 0'], inplace=True)
print(f'Number of records: {len(df)}')
Number of records: 74936
df.head()
| bgcolor | dx | dy | edgecolor | frame | play | player | player_num | team | x | y | z | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | NaN | 0.000000 | 0.000000 | NaN | 0 | Liverpool [3] - 0 Bournemouth | 0 | NaN | NaN | 46.394558 | 11.134454 | 0.0 |
| 1 | NaN | 0.185745 | 1.217580 | NaN | 1 | Liverpool [3] - 0 Bournemouth | 0 | NaN | NaN | 46.580302 | 12.352034 | 0.0 |
| 2 | NaN | 0.178659 | 1.171133 | NaN | 2 | Liverpool [3] - 0 Bournemouth | 0 | NaN | NaN | 46.758961 | 13.523166 | 0.0 |
| 3 | NaN | 0.171573 | 1.124685 | NaN | 3 | Liverpool [3] - 0 Bournemouth | 0 | NaN | NaN | 46.930535 | 14.647852 | 0.0 |
| 4 | NaN | 0.164488 | 1.078238 | NaN | 4 | Liverpool [3] - 0 Bournemouth | 0 | NaN | NaN | 47.095022 | 15.726090 | 0.0 |
From the metadata:
- play: the scoreline after the goal. The team who scored the goal is the one next to the brackets.
- frame: the frame number for the current location. Data provided has 20 frames per second.
- player: the id of the player. The id is consistent within a play but not between plays.
- player_num: the player jersey number. This number is the official one, and did not change for Liverpool in 2019. You can check the corresponding names at this wikipedia link.
- x, y: coordinates for the player/ball. Pitch coordinates go from 0 to 100 on each axis.
- dx, dx: change in (x,y) coordinates from last frame to current frame
- z: height, from 0 to 1.5 (only filled for the ball)
- bgcolor: the main color for the team (used as background color)
- edgecolor the secondary color (used as edge color)
And accoring to https://en.wikipedia.org/wiki/Football_pitch
the preferred size for many professional teams' stadiums is 105 by 68 metres
plays = list(df.play.unique())
def to_timestamp(row):
# plays to date
day = plays.index(row.play)+1
start_time = datetime(2019,1,day,12,0,0)
# frames to time
td = timedelta(milliseconds=1000/20*row.frame)
return start_time + td
# frame: the frame number for the current location. Data provided has 20 frames per second
df['time'] = df.apply(to_timestamp, axis=1)
df.set_index('time', inplace=True)
# the preferred size for many professional teams' stadiums is 105 by 68 metres, accoring to https://en.wikipedia.org/wiki/Football_pitch
pitch_length = 105
pitch_width = 68
df.x = df.x / 100 * pitch_length
df.y = df.y / 100 * pitch_width
df
| bgcolor | dx | dy | edgecolor | frame | play | player | player_num | team | x | y | z | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| time | ||||||||||||
| 2019-01-01 12:00:00.000 | NaN | 0.000000 | 0.000000 | NaN | 0 | Liverpool [3] - 0 Bournemouth | 0 | NaN | NaN | 48.714286 | 7.571429 | 0.0 |
| 2019-01-01 12:00:00.050 | NaN | 0.185745 | 1.217580 | NaN | 1 | Liverpool [3] - 0 Bournemouth | 0 | NaN | NaN | 48.909318 | 8.399383 | 0.0 |
| 2019-01-01 12:00:00.100 | NaN | 0.178659 | 1.171133 | NaN | 2 | Liverpool [3] - 0 Bournemouth | 0 | NaN | NaN | 49.096909 | 9.195753 | 0.0 |
| 2019-01-01 12:00:00.150 | NaN | 0.171573 | 1.124685 | NaN | 3 | Liverpool [3] - 0 Bournemouth | 0 | NaN | NaN | 49.277061 | 9.960539 | 0.0 |
| 2019-01-01 12:00:00.200 | NaN | 0.164488 | 1.078238 | NaN | 4 | Liverpool [3] - 0 Bournemouth | 0 | NaN | NaN | 49.449774 | 10.693741 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2019-01-19 12:00:06.000 | blue | 0.000000 | 0.000000 | white | 120 | Leicester 0 - [3] Liverpool | 10267 | NaN | defense | 103.661067 | 36.529840 | 0.0 |
| 2019-01-19 12:00:06.050 | blue | 0.000000 | 0.000000 | white | 121 | Leicester 0 - [3] Liverpool | 10267 | NaN | defense | 103.661067 | 36.529840 | 0.0 |
| 2019-01-19 12:00:06.100 | blue | 0.000000 | 0.000000 | white | 122 | Leicester 0 - [3] Liverpool | 10267 | NaN | defense | 103.661067 | 36.529840 | 0.0 |
| 2019-01-19 12:00:06.150 | blue | 0.000000 | 0.000000 | white | 123 | Leicester 0 - [3] Liverpool | 10267 | NaN | defense | 103.661067 | 36.529840 | 0.0 |
| 2019-01-19 12:00:06.200 | blue | 0.000000 | 0.000000 | white | 124 | Leicester 0 - [3] Liverpool | 10267 | NaN | defense | 103.661067 | 36.529840 | 0.0 |
74936 rows × 12 columns
df['team'].value_counts().plot(title='team', kind='bar', figsize=(15,3))
<AxesSubplot:title={'center':'team'}>
df['player_num'].value_counts().plot(title='player_num', kind='bar', figsize=(15,3))
<AxesSubplot:title={'center':'player_num'}>
df['team'] = df['team'].astype('category').cat.as_ordered()
df['player'] = df['player'].astype('category').cat.as_ordered()
df['player_num'] = df['player_num'].astype('category').cat.as_ordered()
Finally, let's create trajectories:
%%time
CRS = None
traj_collection = mpd.TrajectoryCollection(df, 'player', x='x', y='y', crs=CRS)
mpd.TemporalSplitter(traj_collection).split(mode="day")
print(f"Finished creating {len(traj_collection)} trajectories")
Finished creating 364 trajectories CPU times: total: 33.6 s Wall time: 35.3 s
pitch = Polygon([(0, 0), (0, pitch_width), (pitch_length, pitch_width), (pitch_length, 0), (0, 0)])
plotted_pitch = GeoDataFrame(pd.DataFrame([{'geometry': pitch, 'id': 1}]), crs=CRS).hvplot(color='white', alpha=0.5)
plotted_pitch * traj_collection.filter('player_num', 20).hvplot(**hvplot_defaults)
PLAY = 2
title = f'Play {PLAY} {plays[PLAY]}'
play_trajs = traj_collection.filter('play', plays[PLAY])
play_trajs
TrajectoryCollection with 20 trajectories
play_trajs.plot(column='team', colormap={'attack':'hotpink', 'defense':'turquoise'})
<AxesSubplot:>
generalized = mpd.MinTimeDeltaGeneralizer(play_trajs).generalize(tolerance=timedelta(seconds=0.5))
generalized.hvplot(title=title, c='speed', hover_cols=['player', 'team'], **hvplot_defaults)
(
plotted_pitch *
generalized.hvplot(title=title, c='speed', hover_cols=['player'], cmap='Viridis', **hvplot_defaults)
)
get_file_from_url('https://github.com/anitagraser/movingpandas/raw/master/tutorials/data/soccer_field.png')
pitch_img = hv.RGB.load_image('soccer_field.png', bounds=(0,0,pitch_length,pitch_width))
(
pitch_img *
generalized.hvplot(title=title, c='team', colormap={'attack':'limegreen', 'defense':'purple'},
hover_cols=['player'],**hvplot_defaults) *
generalized.get_start_locations().hvplot(label='start', color='orange')
)